read data
nhis =
read_csv("data/nhis_data01.csv") %>%
janitor::clean_names()
## Rows: 468212 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): NHISHID, NHISPID, HHX, FMX, PX
## dbl (28): YEAR, SERIAL, STRATA, PSU, HHWEIGHT, PERNUM, PERWEIGHT, SAMPWEIGHT...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
number of people reported taken medication for worried, nervous, or anxious feeings from 2015 to 2021.
nhis %>%
filter(
year>=2015,
worrx %in% c(1,2)
) %>%
mutate(
worrx = recode_factor(worrx, '1' = "no", '2' = "yes")
) %>%
group_by(year, worrx) %>%
summarize(wor_num = n()) %>%
pivot_wider(
names_from = worrx,
values_from = wor_num
) %>%
mutate(
wor_percentage = yes/(no + yes)*100,
text_label = str_c(yes, " out of ", no + yes)
) %>%
plot_ly(
y = ~wor_percentage,
x = ~year,
color = ~year,
type = "bar",
colors = "viridis",
text = ~text_label) %>%
layout(title = "Percentage of people reported taken medication for worried, nervous, or anxious feeings each year",
xaxis = list (title = ""),
yaxis = list (title = "Percentage")
) %>%
hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
Frequency of worries
nhis %>%
filter(
year>=2015,
worfreq > 0, worfreq < 7
) %>%
mutate(
worfreq = recode_factor(worfreq, '1' = "Daily", '2' = "Weekly",
'3' = "Monthly", '4' = "A few times a year",
'5' = "Never")
) %>%
group_by(year, worfreq) %>%
summarize(count = n()) %>%
group_by(year) %>%
summarize(
percentage=100 * count/sum(count),
sum_count = sum(count),
worfreq = worfreq,
count=count
) %>%
mutate(
text_label = str_c(count, " out of ", sum_count)
) %>%
plot_ly(
y = ~percentage,
x = ~year,
color = ~worfreq,
type = "bar",
colors = "viridis",
text = ~text_label
) %>%
layout(
title = "Distribution of frequency of worries",
xaxis = list (title = ""),
yaxis = list (title = "Percentage"),
barmode = 'stack',
legend = list(orientation = 'h')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
worry level distribution.
nhis %>%
filter(
year>=2015,
worfeelevl > 0, worfeelevl < 7
) %>%
mutate(
worfeelevl = recode_factor(worfeelevl, '1' = "A lot",
'3' = "Somewhere between a little and a lot",
'2' = "A little")
) %>%
group_by(year, worfeelevl) %>%
summarize(count = n()) %>%
group_by(year) %>%
summarize(
percentage=100 * count/sum(count),
sum_count = sum(count),
worfeelevl = worfeelevl,
count=count
) %>%
mutate(
text_label = str_c(count, " out of ", sum_count)
) %>%
plot_ly(
y = ~percentage,
x = ~year,
color = ~worfeelevl,
type = "bar",
colors = "viridis",
text = ~text_label
) %>%
layout(
title = "Distribution of worry levels",
xaxis = list (title = ""),
yaxis = list (title = "Percentage"),
barmode = 'stack',
legend = list(orientation = 'h')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
number of people reported taken medication for depression from 2015 to 2021.
nhis %>%
filter(
year>=2015,
deprx %in% c(1,2)
) %>%
mutate(
deprx = recode_factor(deprx, '1' = "no", '2' = "yes")
) %>%
group_by(year, deprx) %>%
summarize(dep_num = n()) %>%
pivot_wider(
names_from = deprx,
values_from = dep_num
) %>%
mutate(
dep_percentage = yes/(no + yes)*100,
text_label = str_c(yes, " out of ", no + yes)
) %>%
plot_ly(
y = ~dep_percentage,
x = ~year,
color = ~year,
type = "bar",
colors = "viridis",
text = ~text_label) %>%
layout(title = "Percentage of people reported taken medication for depression each year",
xaxis = list (title = ""),
yaxis = list (title = "Percentage")
) %>%
hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
Frequency of feel depressed
nhis %>%
filter(
year>=2015,
depfreq > 0, depfreq < 7
) %>%
mutate(
depfreq = recode_factor(depfreq, '1' = "Daily", '2' = "Weekly",
'3' = "Monthly", '4' = "A few times a year",
'5' = "Never")
) %>%
group_by(year, depfreq) %>%
summarize(count = n()) %>%
group_by(year) %>%
summarize(
percentage=100 * count/sum(count),
sum_count = sum(count),
depfreq = depfreq,
count=count
) %>%
mutate(
text_label = str_c(count, " out of ", sum_count)
) %>%
plot_ly(
y = ~percentage,
x = ~year,
color = ~depfreq,
type = "bar",
colors = "viridis",
text = ~text_label
) %>%
layout(
title = "Distribution of frequency of feel depressed",
xaxis = list (title = ""),
yaxis = list (title = "Percentage"),
barmode = 'stack',
legend = list(orientation = 'h')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
depression level distribution.
nhis %>%
filter(
year>=2015,
depfeelevl > 0, depfeelevl < 7
) %>%
mutate(
depfeelevl = recode_factor(depfeelevl, '1' = "A lot",
'3' = "Somewhere between a little and a lot",
'2' = "A little")
) %>%
group_by(year, depfeelevl) %>%
summarize(count = n()) %>%
group_by(year) %>%
summarize(
percentage=100 * count/sum(count),
sum_count = sum(count),
depfeelevl = depfeelevl,
count=count
) %>%
mutate(
text_label = str_c(count, " out of ", sum_count)
) %>%
plot_ly(
y = ~percentage,
x = ~year,
color = ~depfeelevl,
type = "bar",
colors = "viridis",
text = ~text_label
) %>%
layout(
title = "Distribution of depression levels",
xaxis = list (title = ""),
yaxis = list (title = "Percentage"),
barmode = 'stack',
legend = list(orientation = 'h')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.